File size: 10,036 Bytes
5948255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import gradio as gr
import requests
from huggingface_hub import InferenceClient
import spaces
from citation_validator import validate_citation, format_authors

# Create icons on startup
try:
    import subprocess
    subprocess.run(['python', 'create_icons.py'], check=True)
    print("Icons created successfully")
except Exception as e:
    print(f"Could not create icons: {e}")

# Initialize Llama 4 client with Cerebras provider
model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
# Use Cerebras as the inference provider
client = InferenceClient(model=model_id, provider="cerebras")

def search_crossref(query):
    """Search Crossref API for publication metadata"""
    try:
        # Check if input is a DOI
        if query.startswith('10.'):
            url = f"https://api.crossref.org/works/{query}"
        else:
            # Search by title/author
            url = f"https://api.crossref.org/works?query={query}&rows=1"
        
        response = requests.get(url, headers={'User-Agent': 'CitationTool/1.0 (mailto:your-email@example.com)'})
        
        if response.status_code == 200:
            data = response.json()
            if 'message' in data:
                if 'items' in data['message']:
                    return data['message']['items'][0] if data['message']['items'] else None
                else:
                    return data['message']
        return None
    except Exception as e:
        print(f"Crossref API error: {e}")
        return None

def generate_intext_citation(crossref_data, citation_style, page_numbers=""):
    """Generate in-text citation from Crossref data"""
    if not crossref_data:
        return None
    
    work = crossref_data
    authors = work.get('author', [])
    year = work.get('published-print', {}).get('date-parts', [[None]])[0][0] or \
           work.get('published-online', {}).get('date-parts', [[None]])[0][0]
    
    if citation_style == "APA7":
        if len(authors) == 0:
            return f"(Unknown Author, {year or 'n.d.'}{', p. ' + page_numbers if page_numbers else ''})"
        elif len(authors) == 1:
            author_name = authors[0].get('family', 'Unknown')
            return f"({author_name}, {year or 'n.d.'}{', p. ' + page_numbers if page_numbers else ''})"
        elif len(authors) == 2:
            author1 = authors[0].get('family', 'Unknown')
            author2 = authors[1].get('family', 'Unknown')
            return f"({author1} & {author2}, {year or 'n.d.'}{', p. ' + page_numbers if page_numbers else ''})"
        else:
            author_name = authors[0].get('family', 'Unknown')
            return f"({author_name} et al., {year or 'n.d.'}{', p. ' + page_numbers if page_numbers else ''})"
    
    elif citation_style == "Chicago":
        if len(authors) == 0:
            title = work.get('title', ['Unknown Title'])[0]
            short_title = title.split(':')[0][:30] + "..." if len(title) > 30 else title
            return f"({short_title}, {year or 'n.d.'}{', ' + page_numbers if page_numbers else ''})"
        elif len(authors) == 1:
            author_name = authors[0].get('family', 'Unknown')
            return f"({author_name}, {year or 'n.d.'}{', ' + page_numbers if page_numbers else ''})"
        else:
            author_name = authors[0].get('family', 'Unknown')
            return f"({author_name} et al., {year or 'n.d.'}{', ' + page_numbers if page_numbers else ''})"
    
    elif citation_style == "MLA":
        if len(authors) == 0:
            return f"(Unknown Author{' ' + page_numbers if page_numbers else ''})"
        elif len(authors) == 1:
            author_name = authors[0].get('family', 'Unknown')
            return f"({author_name}{' ' + page_numbers if page_numbers else ''})"
        elif len(authors) == 2:
            author1 = authors[0].get('family', 'Unknown')
            author2 = authors[1].get('family', 'Unknown')
            return f"({author1} and {author2}{' ' + page_numbers if page_numbers else ''})"
        else:
            author_name = authors[0].get('family', 'Unknown')
            return f"({author_name} et al.{' ' + page_numbers if page_numbers else ''})"

@spaces.GPU
def generate_citation(input_text, citation_style, citation_type, page_numbers=""):
    if not input_text.strip():
        return "Please enter some text to generate a citation."
    
    # First, try to get structured data from Crossref
    crossref_data = search_crossref(input_text)
    
    # Generate in-text citation if requested
    if citation_type == "In-Text Citation" and crossref_data:
        intext = generate_intext_citation(crossref_data, citation_style, page_numbers)
        if intext:
            return intext + "\n✅ In-text citation generated"
    
    # Prepare the prompt for Llama 4 for full citations
    if crossref_data:
        # Extract and format key information from Crossref
        work = crossref_data
        authors = format_authors(work.get('author', []))
        title = work.get('title', [''])[0] if work.get('title') else ''
        journal = work.get('container-title', [''])[0] if work.get('container-title') else ''
        year = work.get('published-print', {}).get('date-parts', [[None]])[0][0] or \
               work.get('published-online', {}).get('date-parts', [[None]])[0][0]
        doi = work.get('DOI', '')
        
        if citation_type == "In-Text Citation":
            if citation_style == "MLA":
                prompt = f"""Create an MLA in-text citation using this data:
                Authors: {authors}
                Page numbers: {page_numbers}
                
                Generate only the in-text citation (e.g., (Author page) or (Author and Author page))."""
            else:
                prompt = f"""Create a {citation_style} in-text citation using this data:
                Authors: {authors}
                Year: {year}
                Page numbers: {page_numbers}
                
                Generate only the in-text citation (e.g., (Author, Year) or (Author, Year, p. X))."""
        else:
            prompt = f"""Create a perfect {citation_style} full reference citation using this verified publication data:
            Authors: {authors}
            Title: {title}
            Journal/Publication: {journal}
            Year: {year}
            DOI: {doi}
            
            Format this as a complete, properly formatted {citation_style} reference citation following all style guidelines exactly."""
    else:
        if citation_type == "In-Text Citation":
            prompt = f"""Create a {citation_style} in-text citation from this information: {input_text}
            Page numbers: {page_numbers}
            
            Generate only the in-text citation format. Follow {citation_style} guidelines exactly."""
        else:
            prompt = f"""Create a {citation_style} full reference citation from this information: {input_text}
            
            Follow {citation_style} formatting guidelines exactly. If information is missing, use appropriate placeholders like [Author] or [Year]."""
    
    try:
        # Use conversational approach with Cerebras
        messages = [{"role": "user", "content": prompt}]
        response = client.chat_completion(
            messages=messages,
            max_tokens=500,
            temperature=0.1
        )
        
        # Extract the citation from the response
        citation = response.choices[0].message.content
        
        # Validate the generated citation
        errors = validate_citation(citation, citation_style)
        
        if errors:
            warning = "\n⚠️ Validation warnings:\n" + "\n".join(f"• {error}" for error in errors)
            return citation + warning
        else:
            return citation + "\n✅ Citation validated"
            
    except Exception as e:
        return f"Error generating citation: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="Citation Tool PWA") as app:
    gr.Markdown("# 📚 Citation Generation Tool")
    gr.Markdown("*AI-powered citations for APA7, Chicago, and MLA styles*")
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Enter DOI, title, or citation information", 
                placeholder="e.g., 10.1038/nature12373 or 'The Structure of Scientific Revolutions'",
                lines=3
            )
            with gr.Row():
                citation_style = gr.Dropdown(
                    choices=["APA7", "Chicago", "MLA"], 
                    value="APA7", 
                    label="Citation Style"
                )
                citation_type = gr.Dropdown(
                    choices=["Full Citation", "In-Text Citation"], 
                    value="Full Citation", 
                    label="Citation Type"
                )
            page_numbers = gr.Textbox(
                label="Page numbers (for in-text citations)", 
                placeholder="e.g., 15-20 or 42",
                visible=False
            )
            generate_btn = gr.Button("Generate Citation", variant="primary")
    
    with gr.Row():
        output = gr.Textbox(
            label="Generated Citation", 
            lines=5,
            interactive=False
        )
    
    # Example section
    gr.Markdown("### Examples:")
    gr.Markdown("- **DOI**: `10.1038/nature12373`")
    gr.Markdown("- **Title**: `The Structure of Scientific Revolutions`")
    gr.Markdown("- **Author + Title**: `Kuhn Scientific Revolutions`")
    
    # Show/hide page numbers based on citation type
    def toggle_page_numbers(citation_type):
        return gr.update(visible=(citation_type == "In-Text Citation"))
    
    citation_type.change(
        toggle_page_numbers, 
        inputs=[citation_type], 
        outputs=[page_numbers]
    )
    
    generate_btn.click(
        generate_citation, 
        inputs=[input_text, citation_style, citation_type, page_numbers], 
        outputs=output
    )

if __name__ == "__main__":
    app.launch()