@@ -227,6 +227,13 @@ def analyze_repository(self, repo_path: str, repo_name: str = None, github_url:
227227 self ._analyze_git_history (repo_path , metadata )
228228 self ._analyze_code_metrics (repo_path , metadata )
229229 self ._extract_features_and_usage (repo_path , metadata )
230+
231+ # ENHANCED: Add intelligent content extraction
232+ self ._extract_intelligent_description (repo_path , metadata )
233+ self ._detect_advanced_project_features (repo_path , metadata )
234+ self ._generate_smart_installation_instructions (repo_path , metadata )
235+ self ._extract_code_usage_examples (repo_path , metadata )
236+
230237 self ._calculate_quality_score (metadata )
231238
232239 duration = (time .time () - start_time ) * 1000
@@ -745,4 +752,286 @@ def _detect_license_type(self, license_path: Path) -> str:
745752 else :
746753 return 'Custom'
747754 except Exception :
748- return 'Unknown'
755+ return 'Unknown'
756+
757+ def _extract_intelligent_description (self , repo_path : str , metadata : ProjectMetadata ):
758+ """Extract intelligent project description from multiple sources."""
759+ descriptions = []
760+
761+ # 1. Check existing README for tagline
762+ readme_path = Path (repo_path ) / 'README.md'
763+ if readme_path .exists ():
764+ try :
765+ content = readme_path .read_text (encoding = 'utf-8' )
766+ lines = content .split ('\n ' )
767+ for i , line in enumerate (lines [:15 ]): # Check first 15 lines
768+ if line .startswith ('>' ): # Often used for taglines
769+ desc = line .strip ('> ' ).strip ()
770+ if len (desc ) > 15 and not any (skip in desc .lower () for skip in ['installation' , 'usage' , 'getting started' ]):
771+ descriptions .append (desc )
772+ elif i > 0 and line and not line .startswith ('#' ) and not line .startswith ('[' ) and not line .startswith ('!' ):
773+ if 20 < len (line ) < 200 and not line .startswith ('```' ):
774+ descriptions .append (line .strip ())
775+ # Look for description after project name
776+ if line .startswith ('# ' ) and i + 1 < len (lines ):
777+ next_line = lines [i + 1 ].strip ()
778+ if next_line and not next_line .startswith ('#' ) and len (next_line ) > 20 :
779+ descriptions .append (next_line )
780+ except :
781+ pass
782+
783+ # 2. Check main module docstrings (Python)
784+ if metadata .primary_language == 'python' :
785+ main_files = ['main.py' , 'app.py' , '__init__.py' , f'{ metadata .name } .py' ]
786+ for filename in main_files :
787+ filepath = Path (repo_path ) / filename
788+ if filepath .exists ():
789+ try :
790+ content = filepath .read_text (encoding = 'utf-8' )
791+ # Extract module docstring
792+ docstring_match = re .search (r'"""([^"]+)"""' , content )
793+ if docstring_match :
794+ docstring = docstring_match .group (1 ).strip ()
795+ lines = [line .strip () for line in docstring .split ('\n ' ) if line .strip ()]
796+ if lines :
797+ first_line = lines [0 ]
798+ if len (first_line ) > 20 and len (first_line ) < 200 :
799+ descriptions .append (first_line )
800+ except :
801+ pass
802+
803+ # 3. Check package.json description (JavaScript/Node)
804+ if metadata .primary_language in ['javascript' , 'typescript' ]:
805+ package_json = Path (repo_path ) / 'package.json'
806+ if package_json .exists ():
807+ try :
808+ data = json .loads (package_json .read_text ())
809+ if 'description' in data and data ['description' ]:
810+ descriptions .append (data ['description' ])
811+ except :
812+ pass
813+
814+ # 4. Intelligent description based on project type and features
815+ if not descriptions :
816+ if metadata .project_type == 'gui-application' :
817+ descriptions .append (f"Professional GUI application built with { metadata .primary_language .title ()} " )
818+ elif metadata .project_type == 'web-app' :
819+ frameworks = ', ' .join (metadata .frameworks ) if metadata .frameworks else metadata .primary_language .title ()
820+ descriptions .append (f"Web application built with { frameworks } " )
821+ elif metadata .project_type == 'cli-tool' :
822+ descriptions .append (f"Command-line tool for { metadata .name } operations" )
823+ elif metadata .project_type == 'library' :
824+ descriptions .append (f"{ metadata .primary_language .title ()} library for { metadata .name } functionality" )
825+
826+ # Set the best description
827+ if descriptions :
828+ # Prefer longer, more descriptive ones
829+ best_desc = max (descriptions , key = len )
830+ metadata .description = best_desc [:200 ] # Limit length
831+
832+ def _detect_advanced_project_features (self , repo_path : str , metadata : ProjectMetadata ):
833+ """Detect advanced project features by analyzing the codebase."""
834+ features = []
835+ repo_path_obj = Path (repo_path )
836+
837+ # Multi-platform and integration features
838+ if any ((repo_path_obj / f ).exists () for f in ['github/' , '.github/' , 'gitlab/' ]):
839+ features .append ("Multi-Platform Repository Analysis" )
840+
841+ # Intelligent analysis features
842+ frameworks_count = len (metadata .frameworks )
843+ if frameworks_count >= 3 :
844+ features .append ("Intelligent Technology Detection" )
845+
846+ # Template and generation features
847+ template_indicators = ['template' , 'generate' , 'readme' , 'doc' ]
848+ if any (keyword in metadata .name .lower () for keyword in template_indicators ):
849+ features .extend ([
850+ "Professional Templates" ,
851+ "Interactive GUI" ,
852+ "Batch Processing" ,
853+ "Real-time Preview" ,
854+ "Smart Caching" ,
855+ "Export Options"
856+ ])
857+
858+ # GUI application features
859+ if metadata .primary_language == 'python' and any (fw in ['tkinter' , 'pyqt' , 'pyside' , 'kivy' ] for fw in metadata .frameworks ):
860+ features .extend ([
861+ "User-friendly Interface with Modern Design Patterns" ,
862+ "Interactive GUI with Progress Tracking" ,
863+ "Real-time Analysis Updates with User Feedback"
864+ ])
865+
866+ # Web application features
867+ if any (fw in ['flask' , 'django' , 'fastapi' , 'express' , 'react' , 'vue' , 'angular' ] for fw in metadata .frameworks ):
868+ features .extend ([
869+ "RESTful API Integration" ,
870+ "Modern Web Interface" ,
871+ "Responsive Design"
872+ ])
873+
874+ # Architecture and system features
875+ if metadata .structure :
876+ structure_count = len (metadata .structure )
877+ if structure_count >= 4 :
878+ features .append ("Modular and Extensible Architecture" )
879+
880+ # Configuration and settings
881+ config_files = ['config/' , 'settings.py' , 'config.json' , 'config.yaml' , '.env' ]
882+ if any ((repo_path_obj / f ).exists () for f in config_files ):
883+ features .append ("Comprehensive Configuration System" )
884+
885+ # Logging and monitoring
886+ if any ((repo_path_obj / d ).exists () for d in ['logs/' , 'utils/logger.py' , 'logging/' ]):
887+ features .append ("Advanced Logging and Monitoring System" )
888+
889+ # Testing framework
890+ if metadata .has_tests :
891+ features .append ("Automated Testing Framework" )
892+
893+ # Documentation
894+ if metadata .has_docs :
895+ features .append ("Comprehensive Documentation" )
896+
897+ # CI/CD and DevOps
898+ if metadata .has_ci :
899+ features .append ("Continuous Integration Pipeline" )
900+
901+ if metadata .has_docker :
902+ features .append ("Containerization Support" )
903+
904+ # Performance and caching
905+ if any (keyword in str (repo_path_obj ).lower () for keyword in ['cache' , 'performance' , 'optimization' ]):
906+ features .append ("Performance Optimization and Caching" )
907+
908+ # Error handling and reliability
909+ error_handling_indicators = ['try' , 'except' , 'error' , 'exception' ]
910+ python_files = list (repo_path_obj .rglob ('*.py' ))[:10 ] # Check first 10 Python files
911+ has_error_handling = False
912+ for py_file in python_files :
913+ try :
914+ content = py_file .read_text (encoding = 'utf-8' , errors = 'ignore' )
915+ if any (keyword in content .lower () for keyword in error_handling_indicators ):
916+ has_error_handling = True
917+ break
918+ except :
919+ pass
920+
921+ if has_error_handling :
922+ features .append ("Robust Error Handling and Recovery" )
923+
924+ # Remove duplicates and limit features
925+ metadata .features = list (dict .fromkeys (features ))[:8 ]
926+
927+ def _generate_smart_installation_instructions (self , repo_path : str , metadata : ProjectMetadata ):
928+ """Generate intelligent installation instructions based on project setup."""
929+ instructions = []
930+ repo_path_obj = Path (repo_path )
931+
932+ # 1. Repository cloning (always first)
933+ if metadata .repository_url :
934+ instructions .append (f"git clone { metadata .repository_url } " )
935+ instructions .append (f"cd { metadata .name } " )
936+
937+ # 2. Language-specific setup
938+ if metadata .primary_language == 'python' :
939+ # Check for requirements.txt or pyproject.toml
940+ if (repo_path_obj / 'requirements.txt' ).exists ():
941+ instructions .append ("pip install -r requirements.txt" )
942+ elif (repo_path_obj / 'pyproject.toml' ).exists ():
943+ instructions .append ("pip install -e ." )
944+ elif (repo_path_obj / 'setup.py' ).exists ():
945+ instructions .append ("pip install -e ." )
946+ else :
947+ # Generate based on detected dependencies
948+ if metadata .dependencies .get ('pip' ):
949+ deps = metadata .dependencies ['pip' ][:5 ] # First 5 deps
950+ if deps :
951+ instructions .append (f"pip install { ' ' .join (deps )} " )
952+
953+ elif metadata .primary_language in ['javascript' , 'typescript' ]:
954+ if (repo_path_obj / 'package.json' ).exists ():
955+ instructions .append ("npm install" )
956+ elif (repo_path_obj / 'yarn.lock' ).exists ():
957+ instructions .append ("yarn install" )
958+
959+ # 3. Application startup
960+ if metadata .primary_language == 'python' :
961+ # Look for main entry point
962+ main_files = ['main.py' , 'app.py' , 'run.py' , f'{ metadata .name } .py' ]
963+ for main_file in main_files :
964+ if (repo_path_obj / main_file ).exists ():
965+ instructions .append (f"python { main_file } " )
966+ break
967+ else :
968+ # Default based on project type
969+ if metadata .project_type == 'web-app' :
970+ instructions .append ("python app.py" )
971+ else :
972+ instructions .append ("python main.py" )
973+
974+ elif metadata .primary_language in ['javascript' , 'typescript' ]:
975+ instructions .append ("npm start" )
976+
977+ metadata .installation_commands = instructions
978+
979+ def _extract_code_usage_examples (self , repo_path : str , metadata : ProjectMetadata ):
980+ """Extract usage examples from code comments, docstrings, and examples."""
981+ examples = []
982+ repo_path_obj = Path (repo_path )
983+
984+ # 1. Look for examples directory
985+ examples_dirs = ['examples' , 'example' , 'samples' , 'demo' , 'demos' ]
986+ for dirname in examples_dirs :
987+ examples_dir = repo_path_obj / dirname
988+ if examples_dir .exists () and examples_dir .is_dir ():
989+ py_files = list (examples_dir .glob ('*.py' ))[:3 ] # Max 3 examples
990+ for file_path in py_files :
991+ try :
992+ content = file_path .read_text (encoding = 'utf-8' )
993+ # Extract meaningful code blocks
994+ lines = content .split ('\n ' )
995+ code_block = []
996+ for line in lines [:20 ]: # First 20 lines
997+ if line .strip () and not line .strip ().startswith ('#' ):
998+ code_block .append (line )
999+ if len (code_block ) >= 3 :
1000+ examples .append ('\n ' .join (code_block [:10 ]))
1001+ except :
1002+ pass
1003+
1004+ # 2. Extract from main files (Python docstrings)
1005+ if metadata .primary_language == 'python' :
1006+ main_files = ['main.py' , 'app.py' , '__init__.py' ]
1007+ for filename in main_files :
1008+ filepath = repo_path_obj / filename
1009+ if filepath .exists ():
1010+ try :
1011+ content = filepath .read_text (encoding = 'utf-8' )
1012+ # Look for usage examples in docstrings
1013+ example_patterns = [
1014+ r'Example[s]?:?\s*\n((?:\s*[^\n]+\n)*)' ,
1015+ r'Usage:?\s*\n((?:\s*[^\n]+\n)*)' ,
1016+ r'```python\s*\n((?:[^\n`]+\n)*)' ,
1017+ ]
1018+ for pattern in example_patterns :
1019+ matches = re .findall (pattern , content , re .MULTILINE )
1020+ for match in matches [:2 ]: # Max 2 per file
1021+ cleaned = match .strip ()
1022+ if len (cleaned ) > 20 :
1023+ examples .append (cleaned )
1024+ except :
1025+ pass
1026+
1027+ # 3. Generate example based on project type
1028+ if not examples and metadata .project_type == 'gui-application' :
1029+ if metadata .primary_language == 'python' :
1030+ example = f"""# Launch the GUI application
1031+ from { metadata .name .lower ().replace ('-' , '_' )} import main
1032+
1033+ if __name__ == "__main__":
1034+ main()"""
1035+ examples .append (example )
1036+
1037+ metadata .usage_examples = examples [:3 ] # Limit to 3 examples
0 commit comments