CPAN->GREP


1 to 25 of 772 distributions (8.73 seconds)
Lingua-NATools-v0.7.8/Build.PL
                      't/bin/*.o', 't/bin/*.exe',
                      't/bin/corpus', 't/bin/words',
                      'Lingua-NATools-*',
AMBS/Lingua-NATools-v0.7.8 64 more files »
Text-Mining-0.08/Text-Mining/lib/Text/Mining.pm
	sub create_corpus             { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub get
            { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub get_corpus                { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub 
         { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub delete_corpus             { my ( $self, $arg_ref ) = @_; my $corpus = Text::Mining::Corpus->new(); return $corpus
75 more matches »
ROGERHALL/Text-Mining-0.08 20 more files »
uplug-main-0.3.8/Uplug-0.3.8/bin/uplug-convert
		'language' => 1,
		'corpus' => 1,
		);
TIEDEMANN/uplug-main-0.3.8 93 more files »
Lingua-Ogmios-0.01/etc/ogmios/nlpplatform-demo.rc
#     NO_STD_XML_OUTPUT = 2 # termlistoutput
#      NO_STD_XML_OUTPUT = 3 # HTML output (corpus and tagged terms)
#       NO_STD_XML_OUTPUT = 5 # Txt output
THHAMON/Lingua-Ogmios-0.01 34 more files »
Algorithm-VSM-1.62/examples/calculate_precision_and_recall_for_LSA.pl
my $corpus_dir = "corpus";                     # This is the directory containing
                                               # the corpus
my $corpus_dir = "corpus";                     # This is the directory containing
                                               # the corpus
my $corpus_dir = "corpus";                     # This is the directory containing
                                               # the corpus
5 more matches »
AVIKAK/Algorithm-VSM-1.62 17 more files »
Text-SenseClusters-1.03/discriminate.pl
Removes features that occur less than F times in the training corpus.
	# check if the training file is senseval2 formatted file - if yes quit.
	if($inp_str =~ m/<corpus/i && $inp_str =~ m/<lexelt/i && $inp_str =~ m/<instance/i && $inp_str =~ m/<context/i)
	{
TPEDERSE/Text-SenseClusters-1.03 125 more files »
Lingua-Align-0.04/bin/add_english_treetags
#
# add tree tagger tags & lemmas to an english tigerXML corpus
#
TIEDEMANN/Lingua-Align-0.04 53 more files »
Lingua-YaTeA-0.622/bin/yatea
yatea - Perl script for extracting terms from a corpus of texts and
providing a syntactic analysis in a head-modifier representation.
=item    I<file>               corpus of texts in Flemm or TreeTagger output format
YaTeA aims at extracting noun phrases that look like terms from a
corpus. It also provides their syntactic analysis in a head-modifier
format.
27 more matches »
THHAMON/Lingua-YaTeA-0.622 25 more files »
Lingua-BrillTagger-0.02/lib/Lingua/BrillTagger.xs
			     the training set  
			     When training on a very small corpus, better
			     performance might be obtained by setting this to
void
_load_into_corpus( self, word )
     SV   * self
KWILLIAMS/Lingua-BrillTagger-0.02 1 more file »
Text-Corpus-NewYorkTimes-1.01/lib/Text/Corpus/NewYorkTimes.pm
#12345678901234567890123456789012345678901234
#Interface to New York Times corpus.
C<Text::Corpus::NewYorkTimes> - Interface to New York Times corpus.
  Log::Log4perl->easy_init ($INFO);
  my $corpus = Text::Corpus::NewYorkTimes->new (fileList => $fileList, corpusDirectory => $corpusDirectory);
  dump $corpus->getTotalDocuments;
120 more matches »
KUBINA/Text-Corpus-NewYorkTimes-1.01 6 more files »
Text-Corpus-CNN-1.02/lib/Text/Corpus/CNN.pm
#12345678901234567890123456789012345678901234
#Make a corpus of CNN documents for research.
C<Text::Corpus::CNN> - Make a corpus of CNN documents for research.
  Log::Log4perl->easy_init ($INFO);
  my $corpusDirectory = File::Spec->catfile (getcwd(), 'corpus_cnn');
  my $corpus = Text::Corpus::CNN->new (corpusDirectory => $corpusDirectory);
65 more matches »
KUBINA/Text-Corpus-CNN-1.02 7 more files »
Alvis-TermTagger-0.8/bin/TermTagger.pl
TermTagger.pl -- A Perl script for tagging corpus with terms
TermTagger.pl [options] corpus termlist selected_term_list
This script tags a corpus with terms. Corpus (C<corpus>) is a file
with one sentence per line. Term list (C<termlist>) is a file
1 more match »
THHAMON/Alvis-TermTagger-0.8 10 more files »
Text-Corpus-VoiceOfAmerica-1.03/lib/Text/Corpus/VoiceOfAmerica.pm
C<Text::Corpus::VoiceOfAmerica> - Make a corpus of VOA documents for research.
  Log::Log4perl->easy_init ($INFO);
  my $corpusDirectory = File::Spec->catfile (getcwd(), 'corpus_voa');
  my $corpus = Text::Corpus::VoiceOfAmerica->new (corpusDirectory => $corpusDirectory);
  Log::Log4perl->easy_init ($INFO);
  my $corpusDirectory = File::Spec->catfile (getcwd(), 'corpus_voa');
  my $corpus = Text::Corpus::VoiceOfAmerica->new (corpusDirectory => $corpusDirectory);
60 more matches »
KUBINA/Text-Corpus-VoiceOfAmerica-1.03 7 more files »
Alt-CWB-ambs-2.2.102.4/Changes
TODO:
  - implement tests for the new CWB::CQP interface, using the included VSS corpus
  - complete reorganisation of CWB/Perl modules into packages CWB (utility functions,
    corpus encoding, CQP interface) and CWB-CL (API for low-level corpus access);
    WebCqp functionality and
 packages CWB (utility functions,
    corpus encoding, CQP interface) and CWB-CL (API for low-level corpus access);
    WebCqp functionality and demo Web interface will be released as a separate package
AMBS/Alt-CWB-ambs-2.2.102.4 15 more files »
Dist-Zilla-5.037/dist.ini
[MetaNoIndex]
dir = corpus
dir = misc
parent  = 0 ; used by the AutoPrereq test corpus
RJBS/Dist-Zilla-5.037 41 more files »
Text-Corpus-Inspec-1.00/lib/Text/Corpus/Inspec.pm
#12345678901234567890123456789012345678901234
#Interface to Inspec abstracts corpus.
C<Text::Corpus::Inspec> - Interface to Inspec abstracts corpus.
  Log::Log4perl->easy_init ($INFO);
  my $corpus = Text::Corpus::Inspec->new (corpusDirectory => $corpusDirectory);
  dump $corpus->getTotalDocuments;
70 more matches »
KUBINA/Text-Corpus-Inspec-1.00 6 more files »
Text-Corpus-Summaries-Wikipedia-0.22/lib/Text/Corpus/Summaries/Wikipedia.pm
  use Data::Dump qw(dump);
  my $corpus = Text::Corpus::Summaries::Wikipedia->new;
  $corpus->create;
  my $corpus = Text::Corpus::Summaries::Wikipedia->new;
  $corpus->create;
  dump $corpus->getListOfXmlFiles;
  $corpus->create;
  dump $corpus->getListOfXmlFiles;
46 more matches »
KUBINA/Text-Corpus-Summaries-Wikipedia-0.22 4 more files »
Alt-CWB-CL-ambs-2.2.102.0/CL.xs
Corpus *
cl_new_corpus(registry_dir, registry_name)
    char *  registry_dir
int
cl_delete_corpus(corpus)
    Corpus *    corpus
int
cl_delete_corpus(corpus)
    Corpus *    corpus
3 more matches »
AMBS/Alt-CWB-CL-ambs-2.2.102.0 9 more files »
Lingua-Interset-2.043/Changes
atures. Thanks to Saša Rosen, who tries to
use DZ Interset together with a multi-language parallel corpus called
Intercorp, we also created a driver for the IPI PAN Polish corpus, which in
 a multi-language parallel corpus called
Intercorp, we also created a driver for the IPI PAN Polish corpus, which in
turn caused one systemic change: o-tags (those setting the other feature) can
References). Dan added a driver for the Czech tags of the Multext East
multilingual corpus.
ZEMAN/Lingua-Interset-2.043 61 more files »
Unicode-Tussle-1.11/data/words.utf8
          	blood-bay [adj.] ← blood
bloodbeat             	 › blood-beat, -circulation, -clot, -corpuscle, -disease, -drop, -flow, -freezer, -gout, -mark, -spoor, -spot, -stream, -supply, -system, blood
ing, -monger, -offering, -seller, -wreaker, blood-curdling, -stirring, -stirringness ← blood
bloodcorpuscles       	 › blood-corpuscles, lymph-c, Malpighian corpuscles, splenic c, Pacinian c, c. of Vate
ler, -wreaker, blood-curdling, -stirring, -stirringness ← blood
bloodcorpuscles       	 › blood-corpuscles, lymph-c, Malpighian corpuscles, splenic c, Pacinian c, c. of Vater ← corpuscle
bloodcount   
105 more matches »
BDFOY/Unicode-Tussle-1.11
Alvis-NLPPlatform-0.6/bin/alvis-nlp-standalone
alvis-nlp-standalone - Perl script for linguistically annotating a corpus contained in a file
THHAMON/Alvis-NLPPlatform-0.6 6 more files »
Lingua-EN-Inflexion-0.000004/lib/Lingua/EN/Inflexion.pm
              # "7 formulas found"
              # "7 corpuses found"
              # "7 brothers found"
DCONWAY/Lingua-EN-Inflexion-0.000004 7 more files »
String-Sections-0.3.2/corpus/template/parse_filehandle.tpl
my $corpus;
my $parsefiles;
};
nofatals 'resolve corpus dir' => sub {
  $corpus = path($FindBin::Bin)->parent->parent->parent->child('corpus');
nofatals 'resolve corpus dir' => sub {
  $corpus = path($FindBin::Bin)->parent->parent->parent->child('corpus');
};
2 more matches »
KENTNL/String-Sections-0.3.2 16 more files »
WordNet-Similarity-2.05/doc/config.pod
information content file containing the frequency of occurrence of every
WordNet concept in a large corpus. A number of utility programs are
included in this distribution that can be used to generate an inf
TPEDERSE/WordNet-Similarity-2.05 18 more files »
Lingua-BioYaTeA-0.11/bin/bioyatea
# my $current_dir = `pwd`;
my $corpus_path = $ARGV[$#ARGV];
    open($fh, ">$preProcessingFile") or die "can not open file $preProcessingFile to record corrected file";
    $preProc->process_file($corpus_path, $fh);
    close($fh);
    close($fh);
    $corpus_path = $preProcessingFile;
}
14 more matches »
BIBLIOME/Lingua-BioYaTeA-0.11 18 more files »
Home · About