#!/usr/bin/env python '''Keeps GDep available for processing multiple sentences/texts. Get GDep from: http://www.cs.cmu.edu/~sagae/parser/gdep The advantage of this module is that GDep is readily avalaiable once it's loaded. So you don't have to lose time reading in the modules for each different call to GDep. USAGE >>> parser = Parser('gdep-beta1') >>> print parser.parse('The The cat smiles .\\nShe is lovely .') 1 The The B-NP DT O 4 NMOD 2 The The I-NP DT O 4 NMOD 3 cat cat I-NP NN O 4 NMOD 4 smiles smile I-NP NNS O 0 ROOT 5 . . O . O 4 P 1 She She B-NP PRP O 2 SUB 2 is be B-VP VBZ O 0 ROOT 3 lovely lovely B-ADJP JJ O 2 PRD 4 . . O . O 2 P # License: GNU General Public License, see http://www.clips.ua.ac.be/~vincent/scripts/LICENSE.txt ''' __date__='October 2010' __version__='1.0' __author__='Vincent Van Asch' __url__ = 'http://www.clips.ua.ac.be/~vincent/software.html' import os, sys, time, subprocess class Parser(subprocess.Popen): ''' A Gdep interface object Note: Creating a Parser takes 15secs startup time. ''' def __init__(self, gdepdir): '''gdepdir: a directory where a working binary of Gdep resides.''' self.gdepdir = os.path.abspath(os.path.expanduser(gdepdir)) command = [os.path.join(self.gdepdir, 'gdep'), '-nt'] # Dump STDERR null = open(os.devnull, 'w') # Save current dir and go to gdepdir dir = os.getcwd() os.chdir(self.gdepdir) try: subprocess.Popen.__init__(self, command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=null) finally: os.chdir(dir) time.sleep(15) def __repr__(self): return '' %self.gdepdir def write(self, string): '''Send a UTF8 encoded string to the parser.''' self.stdin.write(string.strip()+'\n') def readline(self): '''Read one output line from the parser.''' return self.stdout.readline() def parse(self, string): ''' Takes a string and returns the parser's answer. string: tokenized sentences on newlines. ''' # Send to parser string = string.strip() self.write(string) # Keep the number of tokens for checking the output length=len(string.split()) # Collect output line=self.readline() output='' count=0 while count < length+string.count('\n'): output+=line line = self.readline() count+=1 return output.strip() if __name__ == '__main__': print __doc__