@prefix clarin_el: <http://w3id.org/clarin_el_dictionary/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix omtd: <http://w3id.org/meta-share/omtd-share/> .

clarin_el:tokenization
  a skos:Concept ;
  skos:note "Tokenization is commonly seen as an independent process of linguistic analysis, in which the input stream of characters is segmented into an ordered sequence of word-like units, usually called tokens, which function as input items for subsequent steps of linguistic processing. Tokens may correspond to words, numbers, punctuation marks or even proper names.The recognized tokens are usually classified according to their syntax. Since the notion of tokenization seems to have different meanings to different people, some tokenization tools fulfil additional tasks like for instance sentence boundary detection, handling of end-line hyphenations or conjoined clitics and contractions."@en ;
  skos:broader clarin_el:structuralAnnotation ;
  skos:inScheme clarin_el:lrtInfrastructureScheme ;
  skos:definition "the task/process of recognizing and tagging tokens (words, punctuation marks, digits etc.) in a text"@en ;
  skos:altLabel "segmentation"@en, "segmentation and tokenization"@en, "tokenisation"@en-GB, "segmentation and tokenisation"@en-GB ;
  skos:prefLabel "επισημείωση λεκτικών μονάδων"@el, "tokenization"@en ;
  skos:exactMatch omtd:Tokenization .

clarin_el:lrtInfrastructureScheme
  skos:prefLabel "Λεξικό CLARIN:EL"@el, "CLARIN:EL Dictionary"@en ;
  a skos:ConceptScheme .

clarin_el:structuralAnnotation
  skos:prefLabel "structural annotation"@en, "δομική επισημείωση"@el ;
  a skos:Concept ;
  skos:narrower clarin_el:tokenization .

