Unwrap text for Import and reformat as table (1 Viewer)

BlueIshDan

☠
Local time
Today, 05:26
Joined
May 15, 2014
Messages
1,122


How does this look?
*NOTICE* ERR in table for question five is due to a duplicate answer letter. *NOTICE*

5. A query can do which of the following:
a. display less fields than the underlying table
b. make a table
c. update a table
*a. all of the above

*NOTICE*
Generated by this code:

Code:
Public Sub ImportQuestions(ByVal file As String)

    Dim fs As New FileSystemObject
    If Not fs.FileExists(file) Then: Exit Sub
    
    Dim rs As Recordset: Set rs = CurrentDb.OpenRecordset("questions")
    Dim str_line As Variant: Dim str_temp As String: Dim is_answer As Boolean
    Dim last_option As String: Dim first_record As Boolean: first_record = True
    
    For Each str_line In GetFileLines(file, True, False, False)
        
        str_temp = Trim(Left(str_line, 3))
        
        If InStr(1, str_temp, ".") + _
           InStr(1, str_temp, ")") = 0 Then
           
           rs(last_option) = rs(last_option) + " " + Trim(str_line)
           
        Else
            
            str_temp = Replace(str_temp, ".", vbNullString)
            str_temp = Replace(str_temp, ")", vbNullString)
            
            If IsNumeric(str_temp) Then
            
                If Not first_record Then
                    rs.Update
                Else: first_record = False
                End If
                
                rs.AddNew
                
                rs!question = Mid(str_line, 4)
                
            Else
            
                If InStr(1, str_temp, "*") > 0 Then
                
                    str_temp = Replace(str_temp, "*", vbNullString)
                    
                    rs!answer = str_temp
                    
                End If
                
                last_option = Trim(Replace(str_temp, Chr(9), vbNullString))
                
                rs(last_option) = Mid(str_line, 4)
                
            End If
            
        End If
        
    Next
    
    rs.Close
    
End Sub

Public Function GetFileLines(ByVal address As String, _
                             ByVal remove_blank_lines As Boolean, _
                             ByVal trim_lines As Boolean, _
                             ByVal keep_newline_char)
    
    ' keep_newline_char represents the Chr(byte) value of 10 and 13. These Bytes represent a NewLine.
    ' passing a true value to this parameter will cause returned lines to contain the new line value at the end of them.
    ' Use: True - Displaying data in a message
    '      False - Parsing the data line by line.
    
                             
    Dim fs As New FileSystemObject
    
    Dim arr_bytes() As Byte
    Dim file_node As Long
    Dim var_byte As Variant
    
    Dim lines() As String
    Dim line_count As Long: line_count = 0
    
    ReDim lines(line_count)
        
    If fs.FileExists(address) Then
    
        ReDim arr_bytes(FileLen(address))
        file_node = FreeFile
        
        Open address For Binary Access Read As file_node
            Get 1, , arr_bytes
        Close file_node
        
        
        For Each var_byte In arr_bytes
            
            If var_byte = 10 Or var_byte = 13 Then
            
                If trim_lines Then: lines(line_count) = Trim(lines(line_count))
                
                If remove_blank_lines And Trim(lines(line_count)) = "" Then
                    lines(line_count) = ""
                Else
                    If keep_newline_char Then: lines(line_count) = lines(line_count) & Chr(var_byte)
                    line_count = line_count + 1
                    ReDim Preserve lines(line_count)
                End If
                
            Else: lines(line_count) = lines(line_count) & Chr(var_byte)
            End If
            
            var_byte = Empty
            
        Next
        
    End If
    
    Set fs = Nothing
    
    GetFileLines = lines
    
    
End Function
 

Attachments

  • Capture.JPG
    Capture.JPG
    86.4 KB · Views: 315
Last edited:

vbaInet

AWF VIP
Local time
Today, 09:26
Joined
Jan 22, 2010
Messages
26,374
I was just working on something as well but BlueIshDan got here before me ;) But unlike Dan, I've left you a bit of work to do... see my notes. :)

Code:
Private Const NUM_FIRST_CHARS   As Byte = 6   ' Number of first characters to use in identifying the row
Private Const ASC_BEGIN         As Long = 96  ' Chr(96+1)="a", Chr(96+2)="b", etc. Used in counter for responses

Private Enum LineType
    Question = 1
    Response = 2
    Continuation = 3
End Enum

Private Type LineRecord
    RawString    As String      ' Raw text of the line read... will include question/response number
    DataString   As String      ' Trimmed text of the line read... will not include question/response number
    LineNumber   As Long        ' Line number... will not include empty lines
    RecordType   As LineType
    ReponseCount As Byte        ' Counter that will be used in conjunction with ASC_BEGIN as explained above
End Type


Public Function TransformFile(Source As String) As Boolean
' // Main function for transforming the file
' // Most things should be self-explanatory

    Dim fso                 As Scripting.FileSystemObject
    Dim fsoStream           As Scripting.TextStream
    Dim typCurrentLine      As LineRecord
    Dim typPreviousLine     As LineRecord
    
    Set fso = New FileSystemObject
    If Not fso.FileExists(Source) Then Exit Function
    Set fsoStream = fso.OpenTextFile(Source)
    
    ' Begin reading file
    With typCurrentLine
        Do While Not fsoStream.AtEndOfStream
            
            .LineNumber = fsoStream.Line
            .RawString = fsoStream.ReadLine
            
            If Len(.RawString) <> 0 Then
                .RecordType = IdentifyType(Left(.RawString, NUM_FIRST_CHARS))
                HandleData typCurrentLine, typPreviousLine
            End If
        Loop
    End With
    
    ' Save the last record
    HandleData typCurrentLine, typPreviousLine, True
    
    ' Tidy up
    fsoStream.Close
    Set fsoStream = Nothing
    Set fso = Nothing
    
    ' Send success signal
    TransformFile = True
    
End Function


Private Function HandleData(CurrentLine As LineRecord, _
                            PreviousLine As LineRecord, _
                            Optional Finalise As Boolean)
' // Function that handles saving/concatenating data where necessary
' // Also the CurrentLine.DataString variable should be used to store _
     the line text without the prefixed question number/response. _
     I.e. extract "computers" from "a. computers" before calling SaveData() _
     and save it into CurrentLine.DataString. _
     It is DataString you should be using in the SaveData() function not RawString
    
    If CurrentLine.LineNumber > 1 Then
        Select Case CurrentLine.RecordType
            
            Case LineType.Response
                SaveData PreviousLine
                CurrentLine.ReponseCount = CurrentLine.ReponseCount + 1
                PreviousLine = CurrentLine
                
            Case LineType.Question
                SaveData PreviousLine
                CurrentLine.ReponseCount = 0
                PreviousLine = CurrentLine
                
            Case LineType.Continuation
                ' Just concatenate
                PreviousLine.RawString = PreviousLine.RawString & " " & CurrentLine.RawString
                
                If Finalise Then
                    SaveData PreviousLine
                End If
        End Select
    Else
        ' Otherwise, it's at the beginning so simply "initiate" the previous line
        PreviousLine = CurrentLine
    End If
End Function


Private Sub SaveData(LineData As LineRecord)
' // Save the record
    
    With LineData
        If .RecordType = Response Then
            ' Save the response
            Debug.Print .RawString & vbTab & "||" & vbTab & _
                        .RecordType & vbTab & "||" & vbTab & _
                        Chr(ASC_BEGIN + .ReponseCount)  ' Use to order your responses
        Else
            ' Save the question
            Debug.Print .RawString & vbTab & "||" & vbTab & _
                        .RecordType
        End If
    End With
End Sub


Private Function IdentifyType(FirstChars As String) As LineType
' // Function Used to indicate whether the line is a question, response or continuation _
     by evaluating the first couple of characters
' // It handles all response variations and 1 to 999 questions in all it's variations too _
     "a. " or "*a. " or "*a) "... etc, and "1 to 999. " or "1 to 999) " inclusive of tabs _
     and it can be expanded too. But for more flexibility ...
' // ***** Convert this to a regular expression using VBScript's Regex *****
    
    If FirstChars Like "[a-d][.)] *" Or _
       FirstChars Like "[*][a-d][.)] *" Or _
       FirstChars Like "[a-d][.)]" & vbTab & "*" Or _
       FirstChars Like "[*][a-d][.)]" & vbTab & "*" _
    Then
        IdentifyType = Response
    ElseIf FirstChars Like "[1-9][.)] *" Or _
           FirstChars Like "[1-9]#[.)] *" Or _
           FirstChars Like "[1-9]##[.)] *" Or _
           FirstChars Like "[1-9][.)]" & vbTab & "*" Or _
           FirstChars Like "[1-9]#[.)]" & vbTab & "*" Or _
           FirstChars Like "[1-9]##[.)]" & vbTab & "*" _
    Then
        IdentifyType = Question
    Else
        IdentifyType = Continuation
    End If
End Function

To run:
Code:
?TransformFile("Path\To\File.txt")
 

BlueIshDan

&#9760;
Local time
Today, 05:26
Joined
May 15, 2014
Messages
1,122
I was just working on something as well but BlueIshDan got here before me ;) But unlike Dan, I've left you a bit of work to do... see my notes. :)

Nice work :D
 

sxschech

Registered User.
Local time
Today, 01:26
Joined
Mar 2, 2010
Messages
793
Wow, you guys have been busy. Really appreciate it. I've been trying out the code you provided, ran into some issues:

vbaInet: With a quick look, this is looking like it will work.

BlueIshDan: Since I did not have a table layout for the recordset you are calling "Questions", in looking through your code, I assumed it was a two col table with field names of Question, Answer. Is that correct? (Not that it matters for this part of the issue, table layout that data will eventually go into is under Phase 2 of original post) When I ran code after creating the Questions table, I stepped through the code and it generated error "3265 Items Not Found in collection" on this line

rs(Trim(Replace(str_temp, Chr(9), vbNullString))) = Mid(str_line, 4)

while processing the second line of the text file

"a. computers"


I have some other projects to take care of today, so I'll see if I can try again with what you both have provided either tonight or tomorrow, again, I really appreciate your efforts. When I get it working, I'll post the code.
 

BlueIshDan

&#9760;
Local time
Today, 05:26
Joined
May 15, 2014
Messages
1,122
... the picture shows you the table structure lol

Here is one of the table's design view.



As for the error. Are you using the same text file that I am? If not, send me your new one and I'll work through the new variation.
 

Attachments

  • questions_table.JPG
    questions_table.JPG
    19.6 KB · Views: 254

BlueIshDan

&#9760;
Local time
Today, 05:26
Joined
May 15, 2014
Messages
1,122
Now that I think of it, that error is being thrown because of a table design mismatch. The value you're not seeing is the processed string that is being passed as a key => value into the rs object.

Using that table design, the function will run smooth.

Also, I've found something in the code that could come up as a possible problem. The value processed accounting for the random Chr(9) special character was not being stored properly in the last_option variable which is used to append the page wrapped continuation value.

Here is the modified code that handles this situation properly:
Code:
Public Sub ImportQuestions(ByVal file As String)

    Dim fs As New FileSystemObject
    If Not fs.FileExists(file) Then: Exit Sub
    
    Dim rs As Recordset: Set rs = CurrentDb.OpenRecordset("questions")
    Dim str_line As Variant: Dim str_temp As String: Dim is_answer As Boolean
    Dim last_option As String: Dim first_record As Boolean: first_record = True
    
    For Each str_line In GetFileLines(file, True, False, False)
        
        str_temp = Trim(Left(str_line, 3))
        
        If InStr(1, str_temp, ".") + _
           InStr(1, str_temp, ")") = 0 Then
           
           rs(last_option) = rs(last_option) + " " + Trim(str_line)
           
        Else
            
            str_temp = Replace(str_temp, ".", vbNullString)
            str_temp = Replace(str_temp, ")", vbNullString)
            
            If IsNumeric(str_temp) Then
            
                If Not first_record Then
                    rs.Update
                Else: first_record = False
                End If
                
                rs.AddNew
                
                rs!question = Mid(str_line, 4)
                
            Else
            
                If InStr(1, str_temp, "*") > 0 Then
                
                    str_temp = Replace(str_temp, "*", vbNullString)
                    
                    rs!answer = str_temp
                    
                End If
                
                last_option = Trim(Replace(str_temp, Chr(9), vbNullString))
                
                rs(last_option) = Mid(str_line, 4)
                
            End If
            
        End If
        
    Next
    
    rs.Close
    
End Sub
 
Last edited:

sxschech

Registered User.
Local time
Today, 01:26
Joined
Mar 2, 2010
Messages
793
Thanks for the updated post BlueishDan, the code ran fine with the revisions and by adding the additional fields to the table as depicted in your post. In running both your code and vbaInet's, seems that vbaInet's is able to handle question 7 in the sample data, while your code (at least in my case) is not adding it to the table, only gets questions 1-6. I like that your code is smaller, but since vbaInet's code seems to be getting all the rows and outputing them in the format close to what I'm using, this will enable me to put the results in a new cleaned up text file which can then be used to import in with the phase 2 layout. I'm sure it could probably be done in one step, but since the process is already set up, might as well continue with it. As I'm working off site tomorrow, won't have a chance to try either version using the real text files, so will keep you posted once I do.
 

vbaInet

AWF VIP
Local time
Today, 09:26
Joined
Jan 22, 2010
Messages
26,374
I had a bit of time so I went ahead and added the bits that would extract the relevant parts and put them into their respective variables:

1. Question ID - saved in the QuestionID variable
2. Response ID - saved in the ResponseID variable
3. Data - saved in the DataString variable

So to save a reponse, just use the QuestionID, ResponseID and DataString variables.

Now the only enhancement I would suggest (as expressed in the comments in the code) is to convert the LIKE expressions to a singular regular expression for the question and response formats respectively. And of course add some error handling too. The main function TransposeText returns a True if it's success so you could also test against this.

You don't need to save it into another text file, the code was made so that you could save it using a recordset in the SaveData() sub.
Code:
Private Const ASC_BEGIN         As Long = 96  ' Chr(96+1)="a", Chr(96+2)="b", etc. Used in counter for responses

Private Enum LineType
    Question = 1
    Response = 2
    Continuation = 3
End Enum

Private Type LineRecord
    QuestionID   As String      ' Question number... will not include [.)] or space or tab
    ResponseID   As String      ' Response number... will not include [.)*] or space or tab
    RawString    As String      ' Raw text of the line read... will include question/response number
    DataString   As String      ' Trimmed text of the line read... will not include question/response number
    LineNumber   As Long        ' Current line number
    RecordType   As LineType    ' Indicates what type a line is using the LineType enum
    ReponseCount As Byte        ' Counter that will be used in conjunction with ASC_BEGIN as explained above
End Type


Public Function TransformFile(Source As String) As Boolean
' // Main function for transforming the file
' // Most things should be self-explanatory

    Dim fso                 As Scripting.FileSystemObject
    Dim fsoStream           As Scripting.TextStream
    Dim typCurrentLine      As LineRecord
    Dim typPreviousLine     As LineRecord
    
    Const NUM_FIRST_CHARS   As Byte = 6   ' Number of first characters to use in identifying the row
    
    Set fso = New FileSystemObject
    If Not fso.FileExists(Source) Then Exit Function
    Set fsoStream = fso.OpenTextFile(Source)
    
    ' Begin reading file
    With typCurrentLine
        Do While Not fsoStream.AtEndOfStream
            
            .LineNumber = fsoStream.Line
            .RawString = fsoStream.ReadLine
            
            If Len(.RawString) <> 0 Then
                HandleData typCurrentLine, typPreviousLine
            End If
        Loop
    End With
    
    ' Save the last record
    HandleData typCurrentLine, typPreviousLine, True
    
    ' Tidy up
    fsoStream.Close
    Set fsoStream = Nothing
    Set fso = Nothing
    
    ' Send success signal
    TransformFile = True
    
End Function


Private Sub HandleData(CurrentLine As LineRecord, _
                       PreviousLine As LineRecord, _
                       Optional Finalise As Boolean)
' // Function that handles saving/concatenating data where necessary
    
    ' Get the parts
    IdentifyLine CurrentLine
    
    ' Call Save or concatenate depending on type of line
    With CurrentLine
        If .LineNumber > 1 Then
            Select Case .RecordType
                Case LineType.Response
                    SaveData PreviousLine
                    .ReponseCount = .ReponseCount + 1
                    .ResponseID = Chr(ASC_BEGIN + .ReponseCount)
                    PreviousLine = CurrentLine
                
                Case LineType.Question
                    SaveData PreviousLine
                    .ReponseCount = 0
                    PreviousLine = CurrentLine
                    
                Case LineType.Continuation
                    ' Just concatenate
                    PreviousLine.DataString = PreviousLine.DataString & " " & .DataString
                    
                    If Finalise Then
                        SaveData PreviousLine
                    End If
            End Select
        Else
            ' Otherwise, it's at the beginning so simply "initiate" PreviousLine
            PreviousLine = CurrentLine
        End If
    End With
End Sub


Private Sub SaveData(LineData As LineRecord)
' // Save the record
    
    With LineData
        If .RecordType = Response Then
            ' Save the response
            Debug.Print .QuestionID & "." & .ResponseID & ": " & _
                        .DataString & vbTab & "||" & vbTab & _
                        .RecordType
        Else
            ' Save the question
            Debug.Print "Q." & .QuestionID & ": " & _
                        .DataString & vbTab & "||" & vbTab & _
                        .RecordType
        End If
    End With
End Sub


Private Sub IdentifyLine(CurrentLine As LineRecord)
' // Used to indicate whether the line is a question, response or continuation _
     by evaluating the first couple of characters _
     It also calls a function to extract the ID and Data String in two parts
' // It handles all response variations and 1 to 999 questions in all it's variations too. _
     "a. " or "*a. " or "*a) "... etc, and "1 to 999[.)] " inclusive of tabs _
     But for more flexibility ...
' // ***** Convert the LIKE expressions into a regular expression using VBScript's Regex *****
    Dim strFirstChars As String
    
    Const NUM_FIRST_CHARS   As Byte = 6                 ' Number of first characters to use in identifying the row
    Const QUEST_1           As String = "[1-9][.)]"
    Const QUEST_2           As String = "[1-9]#[.)]"
    Const QUEST_3           As String = "[1-9]##[.)]"
    Const RESP_1            As String = "[a-d][.)]"
    Const RESP_2            As String = "[*][a-d][.)]"
    
    With CurrentLine
        strFirstChars = Left(.RawString, NUM_FIRST_CHARS)
        
        Select Case True
            Case strFirstChars Like RESP_1 & " *" Or _
                 strFirstChars Like RESP_2 & " *" Or _
                 strFirstChars Like RESP_1 & vbTab & "*" Or _
                 strFirstChars Like RESP_2 & vbTab & "*"
                 
                 .RecordType = Response
                 ExtractRecord CurrentLine, strFirstChars
                 
            Case strFirstChars Like QUEST_1 & " *" Or _
                 strFirstChars Like QUEST_2 & " *" Or _
                 strFirstChars Like QUEST_3 & " *" Or _
                 strFirstChars Like QUEST_1 & vbTab & "*" Or _
                 strFirstChars Like QUEST_2 & vbTab & "*" Or _
                 strFirstChars Like QUEST_3 & vbTab & "*"
                 
                 .RecordType = Question
                 ExtractRecord CurrentLine, strFirstChars
            
            Case Else
                 .RecordType = Continuation
                 .DataString = .RawString
                 
        End Select
    End With
End Sub


Private Sub ExtractRecord(CurrentLine As LineRecord, _
                          ByVal FirstChars As String)
' // Extracts/re-formats the respective ID and DataString
    Dim intPos  As Long
    Dim strID   As String
    
    With CurrentLine
        FirstChars = Replace(FirstChars, vbTab, Space(1), , 1)
        
        intPos = InStr(1, FirstChars, Space(1))
        strID = Replace(Left(FirstChars, intPos - 2), "*", "")
        
        If .RecordType = Response Then
            .ResponseID = strID
        Else
            .QuestionID = strID
        End If
        
        .DataString = Mid(.RawString, intPos + 1)
    End With
End Sub
 

BlueIshDan

&#9760;
Local time
Today, 05:26
Joined
May 15, 2014
Messages
1,122
Whoops! Hahaha good catch. I forgot to save the last record before exiting the function.

Code:
Public Sub ImportQuestions(ByVal file As String)

    Dim fs As New FileSystemObject
    If Not fs.FileExists(file) Then: Exit Sub
    
    Dim rs As Recordset: Set rs = CurrentDb.OpenRecordset("questions")
    Dim str_line As Variant: Dim str_temp As String: Dim is_answer As Boolean
    Dim last_option As String: Dim first_record As Boolean: first_record = True
    
    For Each str_line In GetFileLines(file, True, False, False)
        
        str_temp = Trim(Left(str_line, 3))
        
        If InStr(1, str_temp, ".") + _
           InStr(1, str_temp, ")") = 0 Then
           
           rs(last_option) = rs(last_option) + " " + Trim(str_line)
           
        Else
            
            str_temp = Replace(str_temp, ".", vbNullString)
            str_temp = Replace(str_temp, ")", vbNullString)
            
            If IsNumeric(str_temp) Then
            
                If Not first_record Then
                    rs.Update
                Else: first_record = False
                End If
                
                rs.AddNew
                
                rs!question = Mid(str_line, 4)
                
            Else
            
                If InStr(1, str_temp, "*") > 0 Then
                
                    str_temp = Replace(str_temp, "*", vbNullString)
                    
                    rs!answer = str_temp
                    
                End If
                
                last_option = Trim(Replace(str_temp, Chr(9), vbNullString))
                
                rs(last_option) = Mid(str_line, 4)
                
            End If
            
        End If
        
    Next
    
[COLOR="Red"]    rs.Update[/COLOR]
    
    rs.Close
    
End Sub
 

sxschech

Registered User.
Local time
Today, 01:26
Joined
Mar 2, 2010
Messages
793
Hi BlueIshDan, spent some time with your code this morning and with a few modifications after running your procedure,

  • added code to change "7) " to "7. "
  • built a union query in order to use the data.
  • put back question number because otherwise, didn't have a way to know which was question 1, 2, etc, since can't rely on ID
Problem I'm encountering is a sequence issue when it is not a, b, c, d such as in question 5 where it looks like:
5.
a.
b.
c.
*a.

the "*a" should be "*d" (sequence a, b, c, d) I believe this situation was caused by Word when messing around with the auto numbering and user deletes or adds data.

Thus, for Question 5, program places data in col "a", overwriting the actual col "a" response.

I tried to modify the code so that if the response letter was less than the value the response letter should be, to increment it. So that "a" would become "d". I'm overlooking something because I get a run-time error 3265 not found in this collection.

Here is your code and my changes in red.

Code:
Public Sub ImportQuestions(ByVal file As String)

    Dim fs As New FileSystemObject
    If Not fs.FileExists(file) Then: Exit Sub
    
    Dim rs As Recordset: Set rs = CurrentDb.OpenRecordset("questions")
    Dim str_line As Variant: Dim str_temp As String: Dim is_answer As Boolean
    Dim last_option As String: Dim first_record As Boolean: first_record = True
    
    [COLOR=Red]Dim qno As Integer
    Dim qnoOld As Integer[/COLOR]
    
    [COLOR=Red]qnoOld = 1[/COLOR]
    
    For Each str_line In GetFileLines(file, True, False, False)
        
        str_temp = Trim(Left(str_line, 3))
        
        If InStr(1, str_temp, ".") + _
           InStr(1, str_temp, ")") = 0 Then
           
           rs(last_option) = rs(last_option) + " " + Trim(str_line)
           
        Else
            
            str_temp = Replace(str_temp, ".", vbNullString)
            str_temp = Replace(str_temp, ")", vbNullString)
            
            If IsNumeric(str_temp) Then
               [COLOR=Red] qno = str_temp[/COLOR]
                If Not first_record Then
                    rs.Update
                Else: first_record = False
                End If
                
                rs.AddNew
                
                rs!Question = Replace(str_line, ") ", ". ", 1, 1) 'Mid(str_line, 4)
                
            Else
 
                [COLOR=Red]If last_option > Mid(str_temp, 2, 1) And qno = qnoOld Then
                        str_temp = Replace(str_temp, Right(str_temp, 1), Chr(Asc(Right(last_option, 1)) + 1))
                End If[/COLOR]
                
                If InStr(1, str_temp, "*") > 0 Then
                    
                    
                        
                    str_temp = Replace(str_temp, "*", vbNullString)
                                            
                    rs!answer = str_temp
                        
                
                End If
                
                last_option = Trim(Replace(str_temp, Chr(9), vbNullString))
                
                rs(last_option) = Mid(str_line, 4)
                
               [COLOR=Red] qnoOld = qno[/COLOR]
            End If
            
        End If
        
    Next
    
    rs.Update
    
    rs.Close
    
End Sub
 

sxschech

Registered User.
Local time
Today, 01:26
Joined
Mar 2, 2010
Messages
793
vbaInet, with your code, having trouble figuring out where to put the code to either save the output as a file or to a table. Seems that I wouldn't put it in the Save data function as that doesn't need to be set up each time. I tried putting initializing and set(s) in the Transformfile Function and the rs!fieldname in the Savedata function, but getting Run-Tme error '424' Object Required.
 

vbaInet

AWF VIP
Local time
Today, 09:26
Joined
Jan 22, 2010
Messages
26,374
To keep things simple:

1. Declare the recordset (rst) and database (dbs) objects as global variables... the same section where this line is :
Code:
Private Const ASC_BEGIN

Private dbs As DAO.Database
Private rst As DAO.Recordset
2. Initialise the variables inside the TransformFile function, just after this line:
Code:
    Set fsoStream = fso.OpenTextFile(Source)

    Set dbs = CurrentDb
    Set rst = dbs.OpenRecordset(...)
3. Add your rs.Add and rs.Save calls in the SaveData function.

Makes sense?
 

sxschech

Registered User.
Local time
Today, 01:26
Joined
Mar 2, 2010
Messages
793
vbaInet, your suggestions worked thanks.

I'm trying to add one more thing to deal with question numbers such that each question will be renumbered incrementally so that if a number is out of sequence, it won't matter. I've spent the afternoon trying to figure out where to put it, I've gotten it to almost work, but the best I can to is have it increment after the question.

questionNumber text1 type
1 What is the most important thing to understand
1 a
1 b
1 c
1 d
1 When building a sample text file it is often required that the information be? <==This should be 2
2 a
2 b
2 c
2 d


After stepping through the code trying "", "a" and "d", using "a" got me the closest to the result, but misses the mark on the actual question. I also tried using the code directly in the SaveData sub, but it kept reseting to 0. Sorry to bother you so much.

Code:
Private Sub ExtractRecord(CurrentLine As LineRecord, _
                          ByVal FirstChars As String)
' // Extracts/re-formats the respective ID and DataString
    Dim intPos  As Long
    Dim strID   As String
    
    
    With CurrentLine
        FirstChars = Replace(FirstChars, vbTab, Space(1), , 1)
        
        intPos = InStr(1, FirstChars, Space(1))
        strID = Replace(Left(FirstChars, intPos - 2), "*", "")
        
        If .RecordType = response Then
            .ResponseID = strID
        Else
            .QuestionID = strID
        End If
        
        .DataString = Mid(.RawString, intPos + 1)
        
        [COLOR=Red]'If .qno <> Val(.QuestionID) And .ResponseID = "d" Then
        If Val(.QuestionID) = 1 And .ResponseID = "" Then
            .qno = .qno + 1
        ElseIf Val(.QuestionID) <> .qno And .ResponseID = "a" Then
            .qno = .qno + 1
        End If[/COLOR]

    End With
End Sub
 

vbaInet

AWF VIP
Local time
Today, 09:26
Joined
Jan 22, 2010
Messages
26,374
Yes I realised that getting grips with the code will be the first challenge but once you do it becomes easily adaptable.

Remember the counter that I'm using for the responses? If you follow the same pattern it will fix that problem. It's handled in the HandleData() function/sub. You'll obviously need to add a QuestionCount variable to the LineType type.

Try and see how you get on. If you're not successful I'll amend it.
 

sxschech

Registered User.
Local time
Today, 01:26
Joined
Mar 2, 2010
Messages
793
Just realized though, going forward resequencing will solve problems before they start, for the current exercise of finding out the problem files needing to be fixed, would probably want the question numbers to come in as they are since my report can identify if they are out of sequence. This may be something I'll look at later when I have time, as I'd probably need to add an option to resequence or not. If I have time tomorrow, I'll revisit, otherwise will get to it later, hope that if I have an issue at that point, still would be able to check in with you. Thanks much.
 

vbaInet

AWF VIP
Local time
Today, 09:26
Joined
Jan 22, 2010
Messages
26,374
I wonder why they're coming out in so many different formats. Try and convince the data providers to follow a consistent format.

Let us know how it goes anyway.
 

sxschech

Registered User.
Local time
Today, 01:26
Joined
Mar 2, 2010
Messages
793
Hi vbaInet, the files come/came in different formats because each instructor has there own style/preference. There is even one instructor who provided the information in a multi tabbed excel file. Perhaps if I knew how to better clean up bulleted list word docs, that would help a lot, but seems whenever I tried, the sequencing became out of whack. It would be nice to specify a default format, unfortunately, these are older files that may or may not have imported properly into the new online system, which is why need to go through each file and find out the problems such as hard return text wraps, spaces in the wrong places, and number/alpha increment issues, etc. Additionally, as I am only working on this as a temp, don't think I'll have much sway in requesting a standard and have gone beyond what they expected by building an access database to help me fix these issues rather than doing each file with a visual search only. So far, the combination of running my original import, which doesn't handle wrapping and incrementing the letters to find/identify the problems and then reimporting using the code you provided to clean it up is working pretty well. I'll need to look into some enhancements down the road, if I'm still here, to try out your suggestion about the numbering and unexpected spacing issues. Thanks again.
 

vbaInet

AWF VIP
Local time
Today, 09:26
Joined
Jan 22, 2010
Messages
26,374
Whether it's an Excel or Word file you should be able to save them as txt files, then read off text files, that's what the code uses.

As for any future modifications to identify other weird combinations this is where regular expressions comes in. Remember in my comments I mentioned that you should convert the LIKE expressions to regular expressions... that's why. Because the entire structure has already been created, the only part that will need tweaking is the LIKE/regular expressions.

The sequencing is trivial. Just replicate the ResponseCount variable in HandleData()
 

sxschech

Registered User.
Local time
Today, 01:26
Joined
Mar 2, 2010
Messages
793
Looks like I'll need to find time to learn regular expressions and experiment with finding tricky word docs to save as text and see how they import. In the quick test, had to delete the first few rows because they contained header information and blank lines. Regarding the excel file, can't really use them directly because the layout does not conform to the pattern - no abcd, no periods, no asterisks (used TRUE/FALSE), so would need to figure out another parsing scheme. Fortunately, so far, only one instructor gave excel, so not a big deal to manually reformat that one.
Encountered a new rare exception, an exam with several questions that go beyond d. Is this where reg expressions come in? The code you provided seems to stop at D for properly formatted questions, but will go beyond D for improperly formatted questions. I'll try to investigate the code later when I have some time, merely letting you know for interest.

I know why q17 gets merged with q16 is due to improper format -- no space between question number and question text, but why would that allow it to go beyond "D" while the properly formatted question 15 concatenates responses E-G onto response D.

Code:
15. This is a question with 7 responses from A to G
A. Most exams have 4 responses
B. This exam is an exception
C. This question has more than one correct answer
D. This response is the only one not correct
E. A & B
*F. A, B, & C
G. All of the above
16.  This question has the traditional 4 responses A to D
A. This is the first letter of the alphabet
B. This is the second letter of the alphabet
C. This is the third letter of the alphabet
*D. All of the above
17.A follow up question is useful
*A. When not sure if the question is understood
B. When the question is understood
C. Under no circumstances
D. When everything is understood perfectly
The result:

Code:
15. This is a question with 7 responses from A to G
a. Most exams have 4 responses
b. This exam is an exception
c. This question has more than one correct answer
d. This response is the only one not correct E. A & B *F. A, B, & C G. All of the above
16. This question has the traditional 4 responses A to D
a. This is the first letter of the alphabet
b. This is the second letter of the alphabet
c. This is the third letter of the alphabet
*d. All of the above 17.A follow up question is useful
*e. When not sure if the question is understood
f. When the question is understood
g. Under no circumstances
h. When everything is understood perfectly
 

sxschech

Registered User.
Local time
Today, 01:26
Joined
Mar 2, 2010
Messages
793
Guess I hit send to soon. I found the original document and it does only have A-D. However the questions were structured in such a way that the person who originally converted and uploaded the text gave them additional responses to help the question look like the word doc.

Code:
15. This is a question with combined responses      
     A. Most exams have 1 correct response      
     B. This exam is an exception      
     C. This question has more than one correct answer      
     D. This response is the only one not correct


     A. A
     B. A & B
    *C. A, B, & C
     D. All of the above
so the first set of A-D are the choices while the second set of A-D are the actual responses. I guess they figured the question would look cleaner and more like the original doc by making the question behave like a response. I guess this would be more of a human decision thing than something to be done programmatically.
 

Users who are viewing this thread

Top Bottom